#' ---
#' title: Reproduce Analyses in Section 3.3 (organizations merge)
#' author: Joe Ornstein
#' date: 2025-07-06
#' version: 0.2
#' ---

rm(list=ls())
cat('\n\n**Application 3: Linking Organization Names**\n\n')

library(tidyverse)

## Load merged dataset and labeled record pairs -------------

# merged dataset
load('data/organizations-merge/bonica_fuzzylink.RData')

# organizations with an exact match in the DIME dataset
num_exact_matches <- length(unique(df_exact$orgname))

# organizations matched by fuzzylink with no exact matches in DIME dataset
num_fuzzy_matches <- length( unique( df$A[!is.na(df$B) & !(df$A %in% df_exact$orgname)] ) )

cat('Unique Organizations Identified:', num_exact_matches + num_fuzzy_matches, '\n')

# dataset of unique record pairs from merged dataset (not including exact matches)
organizations_labeled <- read_csv('data/organizations-merge/bonica_fuzzylink_labeled.csv', progress = FALSE)

cat('Precision (fuzzylink):', mean(organizations_labeled$match %in% c('Yes', 'yes', 'Chapter')))
